***************
* This file prepares the main stes dataset
* Author: Daniel Kopp
***************

import delimited using "data_raw\20220329_SECO2KOF_Tab2_STES.csv" , clear

* Format von Datumvariablen
foreach i in  dat_anmeld dat_abmeld dat_aussteuerung    ///
	dat_geburts_datum dat_aufenthalt_bis rf_beginn rf_ende {
	    g `i'_tmp = date(`i', "YMD")
		format `i'_tmp %tdCCYY-NN-DD
		drop  `i'
		rename  `i'_tmp `i'
}

local date_var "dat_anmeld"	
quietly sum `date_var'
local 	`date_var'_max = r(max)
local 	`date_var'_min = r(min)
disp string(``date_var'_min',"%td") 	// 26nov1962
disp string(``date_var'_max',"%td") 	// 30jun2021

gen dat_anmeld_month= mofd(dat_anmeld)
format dat_anmeld_month %tm

* We drop those stes who deregistered before 1.1.2016
drop  if dat_abmeld<td(01jan2016)		// 46 observations

* We drop those stes who registered before 1.1.2010
drop  if dat_anmeld<td(01jan2010)		// 704 observations

* All variables
local allvars ""
foreach v of varlist * {
	loc allvars "`allvars' `v'"
}
foreach var of varlist `allvars' {
	cap replace `var' = "" if `var'=="\N"
}

rename code_arbeitsform cod_arbeitsform1
rename txt_arbeitsform txt_arbeitsform1

gen sonn_feiertag = 0 
gen schichtarbeit = 0
gen nachtarbeit = 0
gen heimarbeit = 0 
gen lehre = 0
forvalues i = 1/4 {
	replace sonn_feiertag 	= 1 if cod_arbeitsform`i'=="1"
    replace schichtarbeit 	= 1 if cod_arbeitsform`i'=="2"
    replace nachtarbeit 	= 1 if cod_arbeitsform`i'=="3"
    replace heimarbeit 		= 1 if cod_arbeitsform`i'=="4"
    replace lehre 			= 1 if cod_arbeitsform`i'=="9"	
	drop cod_arbeitsform`i'
	drop txt_arbeitsform`i'
}

duplicates drop	// 87 dropped. 

* Some observations where we have several rows per stes due to different work preferences (arbeitsformen)

foreach var in sonn_feiertag schichtarbeit nachtarbeit heimarbeit lehre {
	bys stes_id: egen mean_`var' = mean(`var')
	drop `var'
	rename mean_`var' `var'
}

duplicates drop		// 1,233 dropped. No duplicates any more

destring hoechstanspruch, replace
destring bezogene_taggelder, replace


* Create dummies
lab def gender_lab 0 "Male" 1 "Female"
foreach i in rav cod {
	gen 	`i'_gender = 0 if `i'_geschlecht=="M"
	replace `i'_gender = 1 if `i'_geschlecht=="F"
	replace `i'_gender = . if `i'_geschlecht==""
	label values `i'_gender gender_lab
	drop `i'_geschlecht
}

la def yesno 0 "no" 1 "yes", modify
foreach v of varlist oeffentlich geschuetzt geschuetzt_kontaktangaben fahrzeug {
	rename `v' tmp
	replace tmp = ustrtrim(tmp)
	g `v' = 1 if (tmp == "Y")
	replace `v' = 0 if (tmp == "N")
	replace `v' = . if (tmp == "")
	la val `v' yesno
	drop tmp	
}


* Save code and text of bausgeuebt in separate file
preserve
distinct cod_avam_bausgeuebt		// 4984
duplicates drop cod_avam_bausgeuebt, force
keep cod_avam_bausgeuebt txt_avam_bausgeuebt
save "Help_files\label_avam_bausgeuebt.dta", replace
restore

* Save code and text of ausbildungsniveau in separate file
preserve
distinct cod_ausbildungsniveau		// 19
duplicates drop cod_ausbildungsniveau, force
keep cod_ausbildungsniveau txt_ausbildungsniveau
save "Help_files\label_ausbildungsniveau.dta", replace
restore

drop txt_avam_bausgeuebt txt_ausbildungsniveau name_letzt_ag

distinct stes_id		// 1'886'332

save "data_processed\stes.dta", replace
